### Replication Code for:
# "Preferences over Foreign Migration: Testing Existing Explanations in the Gulf"
# World Politics 2022
# All figures and tables generated in this file use data from:
# "Qatari's Attitudes Towards Foreign Workers" survey (2012)
# collected by SESRI at Qatar University

### Author Erin York
### File updated February 3, 2022
### File created using R version 4.1.1 (2021-08-10) -- "Kick Things"


rm(list = ls())

library(foreign)
library(tidyverse)
library(RItools)
library(xtable)
library(RColorBrewer)
library(stargazer)
library(mediation)
library(MASS)


# 1 Process raw survey data -----------------------------------------------

# set working directory as appropriate
setwd("YOUR_FILEPATH_HERE")

# read in data - adjust filepath as necessary
qafdata<- read.dta("Qatari’s Attitudes Towards Foreign Workers 2012.dta")


adjustedqaf<- tbl_df(qafdata)

# code white collar treatment
treat<- rep(NA, length=nrow(adjustedqaf))
treat[adjustedqaf$migrant=="1. White-collar workers"]<- 1
treat[adjustedqaf$migrant=="2. Blue-collar workers"]<- 0
# note that 1/3 of total survey sample was not included in the framing experiment
# no outcome measures were collected for them
table(treat, useNA = "ifany")

# code main outcome variable to numeric, increasing in support for migration
# neg. values - decreasing migration; positive - increasing migration
y.main<- rep(NA, length=nrow(adjustedqaf))
y.main[adjustedqaf$att1=="1. Increased a lot"]<- 2
y.main[adjustedqaf$att1=="2. Increased a little"]<- 1
y.main[adjustedqaf$att1=="3. Remain the same as it is"]<- 0
y.main[adjustedqaf$att1=="4. Reduced a little"]<- -1
y.main[adjustedqaf$att1=="5. Reduced a lot"]<- -2

## attitudinal mediators toward migrants
# lower values = more negative

### ECON outcomes - welfare
# use healthcare services
y.health<- rep(NA, length=nrow(adjustedqaf))
y.health[adjustedqaf$att6=="1. Very true"]<- 1
y.health[adjustedqaf$att6=="2. Somewhat true"]<- 2
y.health[adjustedqaf$att6=="3. Somewhat false"]<- 3
y.health[adjustedqaf$att6=="4. Very false"]<- 4

# use resources
y.res<- rep(NA, length=nrow(adjustedqaf))
y.res[adjustedqaf$att5=="1. Very true"]<- 1
y.res[adjustedqaf$att5=="2. Somewhat true"]<- 2
y.res[adjustedqaf$att5=="3. Somewhat false"]<- 3
y.res[adjustedqaf$att5=="4. Very false"]<- 4

# use social services
y.soc<- rep(NA, length=nrow(adjustedqaf))
y.soc[adjustedqaf$att16=="1. Very true"]<- 1
y.soc[adjustedqaf$att16=="2. Somewhat true"]<- 2
y.soc[adjustedqaf$att16=="3. Somewhat false"]<- 3
y.soc[adjustedqaf$att16=="4. Very false"]<- 4

### ECON outcomes - employment

# take jobs
y.jobs<- rep(NA, length=nrow(adjustedqaf))
y.jobs[adjustedqaf$att18=="1. Very true"]<- 1
y.jobs[adjustedqaf$att18=="2. Somewhat true"]<- 2
y.jobs[adjustedqaf$att18=="3. Somewhat false"]<- 3
y.jobs[adjustedqaf$att18=="4. Very false"]<- 4


### CULTURAL outcomes

# threaten culture
y.cult<- rep(NA, length=nrow(adjustedqaf))
y.cult[adjustedqaf$att10=="1. Very true"]<- 1
y.cult[adjustedqaf$att10=="2. Somewhat true"]<- 2
y.cult[adjustedqaf$att10=="3. Somewhat false"]<- 3
y.cult[adjustedqaf$att10=="4. Very false"]<- 4

# threaten culture (nonarab)
y.nonarab<- rep(NA, length=nrow(adjustedqaf))
y.nonarab[adjustedqaf$att12=="1. Very true"]<- 1
y.nonarab[adjustedqaf$att12=="2. Somewhat true"]<- 2
y.nonarab[adjustedqaf$att12=="3. Somewhat false"]<- 3
y.nonarab[adjustedqaf$att12=="4. Very false"]<- 4

# cultural benefit
y.poscult<- rep(NA, length=nrow(adjustedqaf))
y.poscult[adjustedqaf$att9=="1. Very true"]<- 4
y.poscult[adjustedqaf$att9=="2. Somewhat true"]<- 3
y.poscult[adjustedqaf$att9=="3. Somewhat false"]<- 2
y.poscult[adjustedqaf$att9=="4. Very false"]<- 1

# ADDITIONAL CONTROLS

# gender
gender<-  rep(NA, length=nrow(adjustedqaf))
gender[adjustedqaf$gender=="1.  MALE"]<- 0
gender[adjustedqaf$gender=="2.  FEMALE"]<- 1

# age
age<- 2012-adjustedqaf$birth
age<- ifelse(age <0, NA, age)

# education
education<- rep(NA, length=nrow(adjustedqaf))
education[adjustedqaf$educ1=="10. NEVER ATTENDED SCHOOL"]<- 0
education[adjustedqaf$educ1=="1. Primary"]<- 1
education[adjustedqaf$educ1=="2. Preparatory"]<- 2
education[adjustedqaf$educ1=="3. Vocational"]<- 3
education[adjustedqaf$educ1=="4. Secondary"]<- 4
education[adjustedqaf$educ1=="5. Post secondary"]<- 5
education[adjustedqaf$educ1=="6. University graduate"]<- 6
education[adjustedqaf$educ1=="7. MASTER'S DEGREE"]<- 7
education[adjustedqaf$educ1=="8. Ph.D."]<- 8

# personal income dummy
income.dummy<- rep(NA, length=nrow(adjustedqaf))
income.dummy[adjustedqaf$pincome1=="1. Less than QR20,000"]<- 0
income.dummy[adjustedqaf$pincome1=="2. QR20,000 or more"]<- 1

# personal income complete
income.full<- rep(NA, length(nrow(adjustedqaf)))
income.full[adjustedqaf$pincome2=="1. Less than QR5,000"]<- 0
income.full[adjustedqaf$pincome2=="2. QR5,000 to less than QR10,000"]<- 1
income.full[adjustedqaf$pincome2=="3. QR10,000 to less than QR15,000"]<- 2
income.full[adjustedqaf$pincome2=="4. QR15,000 to less than QR20,000"]<- 3
income.full[adjustedqaf$pincome3=="1. Less than QR25,000"]<- 4
income.full[adjustedqaf$pincome3=="2. QR25,000 to less than QR30,000"]<- 5
income.full[adjustedqaf$pincome3=="3. QR30,000 to less than QR35,000"]<- 6
income.full[adjustedqaf$pincome3=="4. QR35,000 to less than QR45,000"]<- 7
income.full[adjustedqaf$pincome3=="5. QR45,000 or more"]<- 8

# household income dummy
hincome.dummy<- rep(NA, length=nrow(adjustedqaf))
hincome.dummy[adjustedqaf$hincome1=="1. Less than QR30,000"]<- 0
hincome.dummy[adjustedqaf$hincome1=="2. QR30,000 or more"]<- 1

# household income complete
hincome.full<- hincome.dummy
hincome.full[adjustedqaf$hincome1=="1. Less than QR30,000"]<- 0
hincome.full[adjustedqaf$hincome2=="2. QR10,000 to less than QR15,000"]<- 1
hincome.full[adjustedqaf$hincome2=="3. QR15,000 to less than QR20,000"]<- 2
hincome.full[adjustedqaf$hincome2=="4. QR20,000 to less than QR25,000"]<- 3
hincome.full[adjustedqaf$hincome2=="5. QR25,000 to less than QR30,000"]<- 4
hincome.full[adjustedqaf$hincome1=="2. QR30,000 or more"]<- 5
hincome.full[adjustedqaf$hincome3=="2. QR35,000 to less than QR40,000"]<- 6
hincome.full[adjustedqaf$hincome3=="3. QR40,000 to less than QR50,000"]<- 7
hincome.full[adjustedqaf$hincome3=="4. QR50,000 to less than QR60,000"]<- 8
hincome.full[adjustedqaf$hincome3=="5. QR60,000 or more"]<- 9

# employ domestic servants
household<- rep(NA, length(nrow(adjustedqaf)))
household[adjustedqaf$employees == "2.  NO"]<- 0
household[adjustedqaf$employees == "1.  YES"]<- 1

# owns a company
owncompany<- rep(NA, length = nrow(adjustedqaf))
owncompany[adjustedqaf$trading=="1.  YES"]<- 1
owncompany[adjustedqaf$trading=="2.  NO"]<- 0

#foreign contact through education
educ.external<- rep(NA, length=nrow(adjustedqaf))
educ.external[adjustedqaf$educ2=="1. Qatar University"]<- 0
educ.external[adjustedqaf$educ2=="2. Another university or college inside Qatar"]<- 0
educ.external[adjustedqaf$educ2=="3. An Arab University outside Qatar"]<- 1
educ.external[adjustedqaf$educ2=="4. An American University outside Qatar"]<- 1
educ.external[adjustedqaf$educ2=="5. A British University outside Qatar"]<- 1

# work contact
coworkers<- rep(NA, length=nrow(adjustedqaf))
adjustedqaf<- mutate(adjustedqaf,
                     coworkers=as.numeric(cwork1_2=="1. selected"|
                                            cwork1_3=="1. selected"|cwork1_4=="1. selected"|
                                            cwork1_5=="1. selected"|cwork1_6=="1. selected"))
coworkers<- adjustedqaf$coworkers

# coworkers non-arab
coworkers2<- rep(NA, length=nrow(adjustedqaf))
adjustedqaf<- mutate(adjustedqaf,
                     coworkers2=as.numeric(cwork1_3=="1. selected"|cwork1_4=="1. selected"|
                                             cwork1_5=="1. selected"|cwork1_6=="1. selected"))
coworkers2<- adjustedqaf$coworkers2

# non-qatari friends
adjustedqaf<- mutate(adjustedqaf,
                     friends=as.numeric(friends1_2=="1. selected"|friends1_3=="1. selected"|friends1_4=="1. selected"|
                                          friends1_5=="1. selected"|friends1_6=="1. selected"))

friends<- adjustedqaf$friends

# non-qatari friends (non-arab)
adjustedqaf<- mutate(adjustedqaf,
                     friends2=as.numeric(friends1_3=="1. selected"|friends1_4=="1. selected"|
                                           friends1_5=="1. selected"|friends1_6=="1. selected"))

friends2<- adjustedqaf$friends2


#religion
reli<- rep(NA, length=nrow(adjustedqaf))
reli[adjustedqaf$religious=="3. Not religious at all"]<- 0
reli[adjustedqaf$religious=="2. Somewhat religious"]<- 1
reli[adjustedqaf$religious=="1. Very religious"]<- 2




#employment
employ<- rep(NA, length=nrow(adjustedqaf))
employ[adjustedqaf$employ=="1. FULL-TIME EMPLOYED"|adjustedqaf$employ=="2. PART-TIME EMPLOYED"|
         adjustedqaf$employ== "3. UNEMPLOYED, SEEKING WORK"]<- 1
employ[adjustedqaf$employ=="4. UNEMPLOYED, NOT SEEKING WORK"|adjustedqaf$employ=="5. STUDENT"|
         adjustedqaf$employ== "6. HOUSEWIFE"|adjustedqaf$employ=="7. RETIRED"|
         adjustedqaf$employ=="8. UNABLE TO WORK"]<- 0

unemploy<- rep(NA, length=length(employ))
unemploy[adjustedqaf$employ=="3. UNEMPLOYED, SEEKING WORK"]<- 1
unemploy[adjustedqaf$employ=="1. FULL-TIME EMPLOYED"|adjustedqaf$employ=="2. PART-TIME EMPLOYED"]<- 0


#xenophobia
xeno<- rep(NA, length=nrow(adjustedqaf))
xeno[adjustedqaf$influence=="1. Strongly agree"]<- 0
xeno[adjustedqaf$influence=="2. Somewhat agree"]<- 1
xeno[adjustedqaf$influence=="3. Somewhat disagree"]<- 2
xeno[adjustedqaf$influence=="4. Strongly disagree"]<- 3



#ECONOMIC FACTORS
#economic outlook- general
econ.outlook<- rep(NA, length=nrow(adjustedqaf))
econ.outlook[adjustedqaf$econ2=="1. Improve a lot"]<- 4
econ.outlook[adjustedqaf$econ2=="2. Improve a little"]<- 3
econ.outlook[adjustedqaf$econ2=="3. Remain the same"]<- 2
econ.outlook[adjustedqaf$econ2=="4. Worsen a little"]<- 1
econ.outlook[adjustedqaf$econ2=="5. Worsen a lot"]<- 0


# 2 Generate dummies and imputed variables --------------------------------

# create dataset with cleaned variables
df<- tbl_df(as.data.frame(cbind(y.main, y.cult, y.health, 
                                y.jobs, y.nonarab, y.res, y.soc, 
                                age, coworkers, coworkers2,
                                employ, unemploy, household,
                                education, educ.external,
                                friends, friends2, gender, 
                                hincome.full, hincome.dummy, 
                                income.full, income.dummy, 
                                owncompany, reli, y.poscult, xeno,
                                econ.outlook,
                                treat))) %>%
  # remove 1/3 of sample that were not asked outcome questions
  filter(!is.na(treat))


# additional edu and religion dummies
df<- mutate(df, hs = ifelse(education > 2, 1, 0),
            bs = ifelse(education >= 6, 1, 0),
            religious = ifelse(reli > 0, 1, 0),
            vreli = ifelse(reli == 2, 1, 0),
            foreign_college = ifelse(is.na(educ.external), 0, educ.external))


df<- mutate(df, 
            # generate factor version of main outcome
            y.main.fact = factor(y.main, ordered = TRUE), 
            # generate alternate education metrics:
            # bin into 6 categories
            edu_fact = factor(education, ordered = T),
            edu_bin = plyr::mapvalues(education, from = c(0, 1,2,3,4,5,6,7,8), 
                                to = c(1, 2, 3, 4, 4, 5, 5, 6, 6)),
            edu_bin_fact = factor(edu_bin, ordered = T),
            edu_fact_4cat = factor(plyr::mapvalues(education, c(0:8), c(1, 1, 1, 1, 2, 3, 4, 4, 4)),
                                        ordered = T)) %>%
  # impute median for missing values
    mutate(ed_imp = ifelse(is.na(edu_bin), median(edu_bin, na.rm = T), edu_bin),
         age_imp = ifelse(is.na(age), median(age, na.rm = T), age),
         employ_imp = ifelse(is.na(employ), 1, employ),
         unemploy_imp = ifelse(is.na(employ), 0, unemploy),
         reli_imp = ifelse(is.na(reli), 1, reli),
         vreli_imp = ifelse(is.na(vreli), 0, vreli),
         hincome_imp = ifelse(is.na(hincome.full), median(hincome.full, na.rm = T), hincome.full),
         friends_imp = ifelse(is.na(friends), 1, friends))


# 3 Generate figures and tables in main text ------------------------------


####

## Table 1

desc<- df %>%
  # binned education dummies
  mutate(noedu = as.numeric(edu_bin == 1),
         primary = as.numeric(edu_bin == 2),
         prep = as.numeric(edu_bin == 3),
         sec = as.numeric(edu_bin == 4),
         univ = as.numeric(edu_bin == 5),
         postgrad = as.numeric(edu_bin == 6)) %>%
  dplyr::select(gender, age, noedu:postgrad, educ.external,
                hincome.dummy,
                religious, vreli, employ, unemploy,
                coworkers, y.main)
desc<- as.data.frame(desc)

stargazer(desc, iqr = F,
                     covariate.labels = c("Female", "Age", 
                                          "No Schooling", "Primary", "Preparatory",
                                          "Secondary", "University", "Post-graduate",
                                          "Attended University outside Qatar",
                                          "Household Income > QR30,000", "Religious", "Very Religious",
                                          "In Labor Force", "Unemployed", "Has non-Qatari Coworkers",
                                          "Support for Migration"),
                     title = "Summary Statistics",
                     label = "descriptives", 
                     summary.stat = c("n", "mean", "sd", "min", "max"),
                     float = F)

####


####

## Figure 1

greys<- brewer.pal(n = 7, "Greys")[3:7] #there are 9, I exluded the two lighter hues

ggdat<- df %>%
  filter(!is.na(y.main.fact)) %>%
  group_by(treat, y.main.fact) %>% 
  summarise(count=n()) %>%
  mutate(perc=count/sum(count)) %>%
  mutate(treat2 = plyr::mapvalues(treat, from = c(0, 1), to = c("Blue-collar","White-collar"))) %>%
  filter(!is.na(y.main.fact))


## tiff file in greyscale
ggplot(ggdat, aes(x=y.main.fact, y=perc, fill=y.main.fact)) + 
  geom_histogram(stat="identity") + 
  scale_fill_manual(values = greys) +
  theme_bw() +
  facet_grid( ~ treat2)+ 
  coord_flip() + ylab("Fraction of Respondents") + xlab("") + guides(fill=FALSE) +
  scale_x_discrete(breaks=c(-2:2), 
                   labels=c("Reduced a lot","Reduced a little","Remain the same","Increased a little","Increased a lot")) +
  ggtitle("Should number of foreign workers in Qatar be increased?")

####


####

## Table 2

# generate regression models
model1<- (lm(y.main~treat, data = df))

model2<- (lm(y.main~treat + ed_imp + employ_imp + gender + age_imp, data = df))

model3<- (lm(y.main~treat*ed_imp + gender + age_imp, data = filter(df, employ_imp == 1)))

model4<- (lm(y.main~treat*ed_imp + gender + age_imp, data = filter(df, employ_imp == 0)))

model5<- ((lm(y.main ~ treat*ed_imp*employ_imp+ gender + age_imp, data = df)))


# table output
stargazer(model1,  model2, model3, model4,  model5, 
          title="Testing the Labor Market Hypothesis",
          dep.var.labels=c("Support for Migration"), 
          covariate.labels=c("White-collar (WC)","Education (Edu)",
                             "In Labor Force (Labor)","WC x Edu",
                             "WC x Labor",
                             "Edu x Labor","WC x Edu x Labor"), 
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          label = "mainresults",
          omit = c("gender", "age"),
          add.lines = list(c("Labor Force", "","",  "In", "Out", ""),
                           c("Controls", "No", rep("Yes", 4))),
          float = F)  

####


####

## Figure 2

# data and function for generating bootstrapped estimates
mod_df <- df %>% dplyr::select(y.main, treat, employ_imp, ed_imp, age_imp, gender)

bstrap <- function(ests = F){
  # sample observations
  s <- sample(x = 1: nrow(mod_df), size = nrow(mod_df), replace = T)
  d <- mod_df[s,]
  if(ests == F){
    mod <- lm(y.main ~ treat * employ_imp * ed_imp + age_imp + gender, data = d)
  } 
  else{
    mod <- lm(y.main ~ treat * employ_imp * ed_imp + age_imp + gender, data = mod_df)
  }
  # generate predicted measures
  for(i in 1:6){
    assign(paste0("BO", i), coef(mod)[1] + 38 * coef(mod)[5] + coef(mod)[4] * i)
    assign(paste0("BI", i), coef(mod)[1] + 38 * coef(mod)[5] + coef(mod)[4] * i +
             coef(mod)[3] + coef(mod)[9] * i) 
    assign(paste0("WO", i), coef(mod)[1] + 38 * coef(mod)[5] + coef(mod)[4] * i +
             coef(mod)[2] + coef(mod)[8] * i)
    assign(paste0("WI", i), coef(mod)[1] + 38 * coef(mod)[5] + coef(mod)[4] * i +
             coef(mod)[2] + coef(mod)[7] + coef(mod)[8] * i + coef(mod)[3] + coef(mod)[9] * i +
             coef(mod)[10] * i)
  }
  # return bootstrapped predictions
  return(as.numeric(c( BO1, BO2, BO3, BO4, BO5, BO6,  
                       BI1, BI2, BI3, BI4, BI5, BI6, 
                       WO1, WO2, WO3, WO4, WO5, WO6,  
                       WI1, WI2, WI3, WI4, WI5, WI6)))
}

# set seed and run bootstrapping to generate SEs
set.seed(123456)
mat <- replicate(n = 2000, expr = bstrap())
bs_ses <- apply(X = mat, MARGIN = 1, FUN = sd)

# generate predicted values dataframe
preds_df <- data.frame(Treatment = c(rep("Blue-Collar", 12), rep("White-Collar", 12)),
                       Labor_Force = c(rep(c(rep("Out of Labor Force", 6), rep("In Labor Force", 6)), 2)),
                       Education = rep(1:6, 4),
                       Ests = bstrap(ests = T),
                       SE = bs_ses,
                       cilow = apply(mat, 1, FUN = quantile, probs = 0.025),
                       cihi = apply(mat, 1, FUN = quantile, probs = .975)) %>% 
  mutate(ci_up = Ests + 1.96 * SE,
         ci_down = Ests - 1.96 * SE)

# generate figure 2
ggplot(preds_df, aes(x = (Education + .06 * (Treatment == "Blue-Collar") + .12 * (Labor_Force == "In")), 
                         y = Ests, 
                         col = Treatment,
                         lty = Treatment)) + 
  geom_line() + xlab("Education") + 
  facet_grid(~Labor_Force) +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_color_manual(values= c(greys[5], greys[3])) + 
  ylab("Support for Migration") + 
  geom_errorbar(aes(ymin = cilow, ymax = cihi), width = 0) +
  ylim(c(-1, 1))

####


####

## Figure 3

# function to treatment effects on mediators
med_exp<- function(x, dat = df){
  eq1<- lm(eval(parse(text = x)) ~ treat, data = dat)
  return(c(summary(eq1)$coef[2,1]/abs(summary(eq1)$coef[2,2]), 
           (summary(eq1)$coef[2,1]+1.96*(summary(eq1)$coef[2,2]))/abs(summary(eq1)$coef[2,2]), 
           (summary(eq1)$coef[2,1]-1.96*(summary(eq1)$coef[2,2]))/abs(summary(eq1)$coef[2,2])))
}

# list of potential mediators
allmed<- c("y.cult", "y.nonarab", "y.health", "y.res", "y.soc", "y.jobs")

# group by category
# fiscal burden
fiscal_burden <- c("y.health", "y.res", "y.soc")

# jobs
jobs<- c("y.jobs")

# culture
culture<- c("y.cult", "y.nonarab")

# generate coef estimates + confidence intervals for treatment effect on mediators
mediators_exp<- data_frame(Category = c(rep("Cultural Threat", 2), rep("Fiscal Burden", 3), "Labor Market"),
                           Mediator = allmed,
                           est = sapply(allmed, med_exp)[1,],
                           upper = sapply(allmed, med_exp)[2,],
                           lower = sapply(allmed, med_exp)[3,],
                           labs = c("Threaten Customs", "Threaten Customs \n(Non-Arab)", 
                                    "Strain Health \nServices", "Use Resources", "Use Social Services",
                                    "Take Jobs from \nQataris")) %>%
  mutate(insig = ifelse((upper > 0 & lower > 0), 0, ifelse(upper < 0 & lower < 0, 0, 1)),
         type = "experimental")


# generate mediated effect estimates a_j*b_j
med_all<- function(dat = df){
  d<- dat
  #a_j
  eq1a<- lm(y.health ~ treat, data = d)
  eq1b<- lm(y.res ~ treat, data = d)
  eq1c<- lm(y.soc ~ treat, data = d)
  eq1d<- lm(y.jobs ~ treat, data = d)
  eq1e<- lm(y.cult ~ treat, data = d)
  eq1f<- lm(y.nonarab ~ treat, data = d)
  #b_j
  eq3<- lm(y.main ~  y.health + y.res+ y.soc + y.jobs + y.cult + y.nonarab + treat +
             gender + age_imp + hincome_imp + vreli_imp + employ_imp + ed_imp + friends_imp, data = d)
  #a_j*b_j
  ies<- c(eq1a$coefficients[2] * eq3$coefficients[2], 
          eq1b$coefficients[2] * eq3$coefficients[3],
          eq1c$coefficients[2] * eq3$coefficients[4],
          eq1d$coefficients[2] * eq3$coefficients[5],
          eq1e$coefficients[2] * eq3$coefficients[6],
          eq1f$coefficients[2] * eq3$coefficients[7])
  return(ies)
}

# function to bootstrap SEs for a_j*b_j estimate
bs_med_all<- function(dat = df) {
  s <- sample(x = 1: nrow(dat), size = nrow(dat), replace = T)
  d <- dat[s,]
  eq1a<- lm(y.health ~ treat, data = d)
  eq1b<- lm(y.res ~ treat, data = d)
  eq1c<- lm(y.soc ~ treat, data = d)
  eq1d<- lm(y.jobs ~ treat, data = d)
  eq1e<- lm(y.cult ~ treat, data = d)
  eq1f<- lm(y.nonarab ~ treat, data = d)
  
  eq3<- lm(y.main ~  y.health + y.res+ y.soc + y.jobs + y.cult + y.nonarab + treat+
             gender + age_imp + hincome_imp + vreli_imp + employ_imp + ed_imp + friends_imp, data = d)
  ies<- c(eq1a$coefficients[2] * eq3$coefficients[2], 
          eq1b$coefficients[2] * eq3$coefficients[3],
          eq1c$coefficients[2] * eq3$coefficients[4],
          eq1d$coefficients[2] * eq3$coefficients[5],
          eq1e$coefficients[2] * eq3$coefficients[6],
          eq1f$coefficients[2] * eq3$coefficients[7])
  return(ies)
}

# set seed and generate bootstrapped estimates
set.seed(123456)
bs_ests<- t(replicate(2000, bs_med_all()))

# mediated effect estimates with SE
mediators<- data_frame(Category = c(rep("Fiscal Burden", 3),  "Labor Market", rep("Cultural Threat", 2)), 
                       Mediator = c(fiscal_burden, jobs, culture),
                       est = med_all(),
                       bs_est = apply(bs_ests, 2, mean),
                       bs_se = apply(bs_ests, 2, sd),
                       labs = c("Strain Health Services", "Use Resources", "Use Social Services",
                                "Take Jobs from Qataris",
                                "Threaten Customs", "Threaten Customs \n(Non-Arab)")) %>%
  mutate(upper = apply(bs_ests, 2, quantile, probs = .975),
         lower = apply(bs_ests, 2, quantile, probs = .025),
         insig = ifelse((upper > 0 & lower > 0), 0, ifelse(upper < 0 & lower < 0, 0, 1)),
         type = "mediated")

# labels for plot
labs<- c(experimental = "Treatment effect on mediators", mediated = "Indirect treatment effect on main outcome")
xax<- c("-5" = "-5","0" = "\u2190 More white-collar - More blue-collar \u2192 ", "5" = "5")

# Fig 3 left panel
mediators_exp %>%
  ggplot(aes(x = labs, y = est, ymin = lower, ymax = upper, col = as.factor(insig))) + 
  geom_pointrange() +
  geom_abline(intercept = 0, slope = 0) +
  theme_bw() +
  facet_grid( Category~type, scales = "free", labeller = labeller(type = labs)) +
  xlab("") +  
  ylab(bquote("Standardized coefficient estimates ("*a[j]*")")) +
  theme(axis.text.x  = element_text(size=12),
        axis.text.y  = element_text(size=13),
        strip.text   = element_text(size=13),
        axis.title.x = element_text(size = 15)) +
  scale_y_continuous(breaks=c(-5, 0, 5), labels = xax) +
  guides(color = "none") +
  scale_color_grey(start = 0, end = .6) +
  coord_flip() 

# Fig 3 right panel
mediators  %>% 
  ggplot(aes(x = labs, y = est, ymin = lower, ymax = upper, col = as.factor(insig))) + 
  geom_pointrange() +
  geom_abline(intercept = 0, slope = 0) +
  theme_bw() +
  facet_grid( Category~type, scales = "free", labeller = labeller(type = labs)) +
  xlab("") +
  scale_x_discrete() +
  ylab(bquote("Mediated effect ("*a[j]*b[j]*") on support for migration")) +
  theme(axis.text.x  = element_text(size=12),
        strip.text   = element_text(size=13),
        axis.text.y=element_blank(),
        axis.title.x = element_text(size = 15)) +
  guides(color = "none") +
  scale_color_grey(start = 0, end = .6) +
  coord_flip() 

####


# 4 Generate figures and tables in supplementary materials ----------------


####

## Table A2

# id treatment
treat<- df$treat
# select variables to look at balance
desc2<- df %>% dplyr::select(gender, age, hs, bs, educ.external,
                      hincome.dummy, reli, employ, unemploy,
                      coworkers, friends)

# test balance using xBalance
Balance_Formula <- reformulate(c(names(desc2)), response = "treat")
df_Balance <- xBalance(Balance_Formula, data = desc2, report = "all")

# format table for output
r1<- c("Female", "Age", "Graduated High School", "Graduated College",
       "Attended University outside Qatar",
       "Household Income > QR30,000", "Religiosity",
       "In Labor Force", "Unemployed", "Has non-Qatari Coworkers",
       "Has non-Qatari Friends")
r2<- c("on Age", "on High School", "on College", 
       "on University outside Qatar","on Household Income", "on Religiosity",
       "on Labor Force","on Unemployment",
       "on non-Qatari coworkers","on non-Qatari Friends")
rownames(df_Balance[[1]])[1:11]<-  r1
rownames(df_Balance[[1]])[12:21]<- r2
df_bal<- as.data.frame(df_Balance[[1]])

roundr<- function(x){
  return(formatC( round( x, 2 ), format='f', digits=2 ))
}

df_bal<- df_bal %>%
  mutate_each(funs(roundr)) %>%
  mutate(d = ifelse(p.unstrat <= 0.05, 
                    paste0(adj.diff.unstrat, "* (", 
                           adj.diff.null.sd.unstrat, ")"), 
                    paste0(adj.diff.unstrat, " (", adj.diff.null.sd.unstrat, ")")),
         nam = c(r1, r2))

df_bal_new<- dplyr::select(df_bal, nam, 1:2, 8)
a<- xtable(df_bal_new, align = "llrrr")
colnames(a) <- c("", "Blue-Collar", "White-Collar", "Difference (Std. Dev.)")

addtorow<- NA
addtorow$pos <- list()
addtorow$pos[[1]] <- c(-1)
addtorow$pos[[2]] <- c(11)
addtorow$command <- c('\\\\[-1.8ex]\\hline  \n',
                      '\\hline \n \\textit{Missingness...} & & & \\\\ \n')
temp<- list(addtorow[[2]], addtorow[[3]])

# latex output
print(a, add.to.row = temp, include.rownames=FALSE, floating = F)

####


####

## Table A3

# regression models
a3_model1<- (lm(income.full ~ edu_bin + gender + age, df))
a3_model2<- lm(owncompany ~ edu_bin + gender + age, df)
a3_model3<- lm(y.main ~ edu_bin + gender + age, df)

# latex output
stargazer(a3_model1, a3_model2, a3_model3,
          covariate.labels=c("Education"),
          omit.stat = c("rsq", "f", "ser"),
          label = "mainresults",
          column.labels   =c("Income", "Own Company","Support for Migration"),
          dep.var.labels.include = FALSE,
          model.numbers          = FALSE,
          omit = c("gender", "age", "Constant"),
          add.lines = list(c("Controls", rep("Yes", 4))),
          float = F)  

####


####

## Table A4

# regression models
a4_model1<- lm(xeno ~ edu_bin + gender + age, df)
a4_model2<- (lm(xeno ~ edu_bin + foreign_college + gender + age_imp, df))
a4_model3<- lm(econ.outlook ~ edu_bin + gender + age, df)
a4_model4<- (lm(econ.outlook ~ edu_bin + foreign_college + gender + age_imp, df))

# latex output
stargazer(a4_model1, a4_model2, a4_model3, a4_model4,
          covariate.labels=c("Education", "Attended Foreign College"),
          omit.stat = c("rsq", "f", "ser"),
          label = "mainresults",
          dep.var.labels = c("Foreign Openness", "Economic Outlook"),
          model.numbers = FALSE,
          omit = c("gender", "age", "Constant"),
          add.lines = list(c("Controls", rep("Yes", 4))),
          float = F)  

####


####

## Table A5

# ordered probit models
a5_model1<- polr(y.main.fact ~ treat + gender + age_imp, df, Hess=TRUE,
            na.action=na.omit, method = c("probit"))

a5_model2<- polr(y.main.fact ~ treat*edu_bin + gender + age_imp, df, Hess=TRUE,
            na.action=na.omit, method = c("probit"))

a5_model3<- polr(y.main.fact ~ treat*edu_fact_4cat + gender + age_imp, df, Hess=TRUE,
            na.action=na.omit, method = c("probit"))

a5_model4<- polr(y.main.fact ~ treat*edu_bin + gender + age_imp, filter(df, employ == 1), Hess=TRUE,
            na.action=na.omit, method = c("probit"))

a5_model5<- polr(y.main.fact ~ treat*edu_bin + gender + age_imp, filter(df, employ == 0), Hess=TRUE,
            na.action=na.omit, method = c("probit"))

a5_model6<- polr(y.main.fact ~ treat*edu_bin*employ + gender + age_imp,df, Hess=TRUE,
            na.action=na.omit, method = c("probit"))

# a5 table output
stargazer(a5_model1, a5_model2, a5_model3, a5_model4, a5_model5, a5_model6,
          covariate.labels = c("White-Collar (WC)", "Education (Edu)", "Secondary (Sec)",
                               "Post-secondary (Post)", "BA or higher (BA)",
                               "In Labor Force (Labor)", "WC x Edu", "WC x Sec",
                               "WC x Post", "WC x BA",
                               "WC x Labor", "Edu x Labor",
                               "WC x Edu x Labor"),
          omit = c("gender", "age"),
          dep.var.labels = "Support for Migration",
          add.lines = list(c("Labor Force", "", "", "", "In", "Out", ""),
                           c("Controls", rep("Yes", 6))),
          no.space = T,
          float = F)

####


####

## Table A6

# generate regression models by gender
a6_model1<- (lm(y.main ~ education*employ*treat + age, 
             data = filter(df, gender == 0)))
a6_model2<- (lm(y.main ~ education*employ*treat + age, 
             data = filter(df, gender == 1)))

# table output
stargazer(a6_model1, a6_model2, title="Testing the Labor Market Hypothesis",
          dep.var.labels=c("Support for Migration"), 
          covariate.labels=c("White-collar (WC)","Education (Edu)",
                             "In Labor Force (Labor)","WC x Edu",
                             "WC x Labor",
                             "Edu x Labor","WC x Edu x Labor"
          ), 
          omit.stat = c("rsq", "f", "ser"),
          label = "mainresults",
          omit = c("gender", "age"),
          add.lines = list(c("Gender", "Male","Female"),
                           c("Controls", rep("Yes", 2))),
          float = F)  

####


####

## Table A7

a7_model1<- (lm(y.main~treat, data = filter(df, employ_imp == 1)))

a7_model2<- (lm(y.main~treat+  gender + age_imp +ed_imp , data = filter(df, employ_imp == 1)))

a7_model3<- (lm(y.main~treat, data = filter(df, employ_imp == 0)))

a7_model4<- (lm(y.main~treat+  gender + age_imp +ed_imp , data = filter(df, employ_imp == 0)))


# table output
stargazer(a7_model1, a7_model2, a7_model3, a7_model4,
          title="Testing the Labor Market Hypothesis",
          dep.var.labels=c("Support for Migration"), 
          covariate.labels=c("White-collar (WC)", "Gender", "Age",
                             "Education (Edu)"), 
          omit.stat = c("rsq", "f", "ser"),
          add.lines = list(c("Labor Force", "In","In","Out", "Out")),
          float = F)  

####


####

## Table A8

# generate alternate measures of education
df_edualt<- df %>% mutate(edu_alt = plyr::mapvalues(education, from = c(0:8, NA),
                                               to = c(1, 1, 1, 2, 2, 2, 3, 3, 3, NA)),
                     hs2 = as.numeric(ed_imp >= 4)) 


# models 1-3 : 3 category education measure
a8_model1<- (lm(y.main~treat*edu_alt+ gender + age_imp, data = filter(df_edualt, employ_imp == 1)))

a8_model2<- (lm(y.main~treat*edu_alt + gender + age_imp, data = filter(df_edualt, employ_imp == 0)))

a8_model3<- ((lm(y.main ~ treat*edu_alt*employ_imp+ gender + age_imp, data = df_edualt)))

# models 4-6 : dummy indicator for high school education or higher
a8_model4<- (lm(y.main~treat*hs2+ gender + age_imp, data = filter(df_edualt, employ_imp == 1)))

a8_model5<- (lm(y.main~treat*hs2 + gender + age_imp, data = filter(df_edualt, employ_imp == 0)))

a8_model6<- ((lm(y.main ~ treat*hs2*employ_imp+ gender + age_imp, data = df_edualt)))


stargazer(a8_model1, a8_model2, a8_model3, 
          a8_model4, a8_model5, a8_model6,
          dep.var.labels=c("Support for Migration"), 
          omit = c("gender", "age"),
          order = c(1, 2, 4, 3),
          covariate.labels=c("White-collar (WC)","Education (3-pt)",
                             "Education (High School)", 
                             "In Labor Force (Labor)","WC x Edu-3",
                             "WC x Labor",
                             "Edu-3 x Labor","WC x Edu-3 x Labor",
                             "Edu-HS x Labor",
                             "WC x Edu-HS x Labor",
                             "WC x Edu-HS"), 
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          label = "mainresults",
          add.lines = list(c("Labor Force", "In", "Out", "","In", "Out", ""),
                           c("Controls", rep("Yes", 6))),
          float = F)  

####


####

## Table A9

# helper function to generate regressions by mediator (eqn 1)
med2<- function(x){
  eq1<- lm(eval(parse(text = x)) ~ treat , data = df)
  return(eq1)
}

# eqn 2 - y regressed on mediators + treatment + controls
full<- lm(y.main ~  treat +y.health + y.res+ y.soc + y.jobs + y.cult + y.nonarab + 
            gender + age_imp + hincome_imp + vreli_imp + employ_imp + ed_imp + friends_imp,
          data = df)

# table A9 output
stargazer(sapply(allmed, FUN = med2, simplify = F), full,
          column.labels = c("Customs", "Non-Arab", "Health Services",
                            "Resources", "Social Services", "Take Jobs",
                            "Support for Migration"),
          covariate.labels = c("White-Collar", "Customs",
                               "Non-Arab", "Health Services",
                               "Resources", "Social Services", "Take Jobs"),
          dep.var.labels = c("Equations 1a-1f", "Equation 2"),
          dep.var.caption = "",
          omit.stat = c("rsq", "f", "ser"),
          omit = c("gender", "age", "*_imp"),
          label = "med_analysis", 
          add.lines = list(c("Controls", rep("No", 6), "Yes")),
          float = F)

####


####

## Figure A1

# data for sensitivity analysis - remove missingness on main outcome
sens_dat<- df %>% filter(!is.na(y.main))

# take jobs mediator - eqns 1 and 2
med_jobs1<- lm(y.jobs ~ treat, filter(df, !is.na(y.main)))
med_jobs2<- lm(y.main ~ y.jobs + treat +
           gender + age_imp + hincome_imp + vreli_imp + 
           employ_imp + ed_imp + friends_imp, sens_dat)

# use mediate and medsens to assess sensitivity
medjobs<- mediate(med_jobs1, med_jobs2, sims = 1000, boot = T, treat = "treat", mediator = "y.jobs")
sens_jobs<- medsens(medjobs, rho.by = .05)

# strain health mediator - eqns 1 and 2
med_health1<- lm(y.health ~ treat, filter(df, !is.na(y.main)))
med_health2<- lm(y.main ~ y.health + treat +
           gender + age_imp + hincome_imp + vreli_imp + 
           employ_imp + ed_imp + friends_imp, df)

medhealth<- mediate(med_health1, med_health2, sims = 1000, boot = T, treat = "treat", mediator = "y.health")
sens_health<- medsens(medhealth, rho.by = .05)

# cultural mediator - eqns 1 and 2
med_cult1<- lm(y.cult ~ treat, filter(df, !is.na(y.main)))
med_cult2<- lm(y.main ~ y.cult + treat +
           gender + age_imp + hincome_imp + vreli_imp + 
           employ_imp + ed_imp + friends_imp, df)

medcult<- mediate(med_cult1, med_cult2, sims = 1000, boot = T, treat = "treat", mediator = "y.cult")
sens_cult<- medsens(medcult, rho.by = .05)

# generate figure A1
plot(sens_jobs, main = "Take Jobs from Qataris")
plot(sens_health,  main = "Strain Health Services")
plot(sens_cult, main = "Threaten Customs")

####


####

## Figure A2

# generate split samples based on low versus high education
ed_low<- filter(df, ed_imp <= 4)
ed_high<- filter(df, ed_imp > 4)

# mediator basic effects - low edu
mediators_exp_low<- data_frame(Category = c(rep("Cultural Threat", 2), rep("Fiscal Burden", 3), "Labor Market"),
                               Mediator = allmed,
                               est = sapply(allmed, med_exp, dat = ed_low)[1,],
                               upper = sapply(allmed, med_exp, dat = ed_low)[2,],
                               lower = sapply(allmed, med_exp, dat = ed_low)[3,],
                               labs = c("Threaten Customs", "Threaten Customs \n(Non-Arab)", 
                                        "Strain Health \nServices", "Use Resources", "Use Social Services",
                                        "Take Jobs from \nQataris")) %>%
  mutate(insig = ifelse((upper > 0 & lower > 0), 0, ifelse(upper < 0 & lower < 0, 0, 1)),
         type = "experimental")

# mediator basic effects - high edu
mediators_exp_high<- data_frame(Category = c(rep("Cultural Threat", 2), rep("Fiscal Burden", 3), "Labor Market"),
                                Mediator = allmed,
                                est = sapply(allmed, med_exp, dat = ed_high)[1,],
                                upper = sapply(allmed, med_exp, dat = ed_high)[2,],
                                lower = sapply(allmed, med_exp, dat = ed_high)[3,],
                                labs = c("Threaten Customs", "Threaten Customs \n(Non-Arab)", 
                                         "Strain Health \nServices", "Use Resources", "Use Social Services",
                                         "Take Jobs from \nQataris")) %>%
  mutate(insig = ifelse((upper > 0 & lower > 0), 0, ifelse(upper < 0 & lower < 0, 0, 1)),
         type = "experimental")

# set seed and run bootstrapping for mediated effects
set.seed(123456)
out_low<- t(replicate(2000, bs_med_all(ed_low)))
out_high<- t(replicate(2000, bs_med_all(ed_high)))


mediators_low<- data_frame(Category = c(rep("Fiscal Burden", 3),  "Labor Market", rep("Cultural Threat", 2)), 
                           Mediator = c(fiscal_burden, jobs, culture),
                           est = med_all(ed_low),
                           bs_est = apply(out_low, 2, mean),
                           bs_se = apply(out_low, 2, sd),
                           labs = c("Strain Health Services", "Use Resources", "Use Social Services",
                                    "Take Jobs from Qataris",
                                    "Threaten Customs", "Threaten Customs \n(Non-Arab)")) %>%
  mutate(upper = apply(out_low, 2, quantile, probs = .975),
         lower = apply(out_low, 2, quantile, probs = .025),
         insig = ifelse((upper > 0 & lower > 0), 0, ifelse(upper < 0 & lower < 0, 0, 1)),
         type = "mediated")

mediators_high<- data_frame(Category = c(rep("Fiscal Burden", 3),  "Labor Market", rep("Cultural Threat", 2)), 
                            Mediator = c(fiscal_burden, jobs, culture),
                            est = med_all(ed_high),
                            bs_est = apply(out_high, 2, mean),
                            bs_se = apply(out_high, 2, sd),
                            labs = c("Strain Health Services", "Use Resources", "Use Social Services",
                                     "Take Jobs from Qataris",
                                     "Threaten Customs", "Threaten Customs \n(Non-Arab)")) %>%
  mutate(upper = apply(out_high, 2, quantile, probs = .975),
         lower = apply(out_high, 2, quantile, probs = .025),
         insig = ifelse((upper > 0 & lower > 0), 0, ifelse(upper < 0 & lower < 0, 0, 1)),
         type = "mediated")


# fig A2 top left
mediators_exp_low %>%
  ggplot(aes(x = labs, y = est, ymin = lower, ymax = upper, col = as.factor(insig))) + 
  geom_pointrange() +
  geom_abline(intercept = 0, slope = 0) +
  theme_bw() +
  facet_grid( Category~type, scales = "free", labeller = labeller(type = labs)) +
  xlab("") +  
  ylab(bquote("Standardized coefficient estimates ("*a[j]*")")) +
  theme(axis.text.x  = element_text(size=12),
        axis.text.y  = element_text(size=13),
        strip.text   = element_text(size=13),
        axis.title.x = element_text(size = 15)) +
  scale_y_continuous(breaks=c(-5, 0, 5), labels = xax, limits = c(-8.5, 5.2)) +
  guides(color = "none") +
  scale_color_grey(start = 0, end = .6) +
  coord_flip() 

# fig A2 top right
mediators_low  %>% 
  ggplot(aes(x = labs, y = est, ymin = lower, ymax = upper, col = as.factor(insig))) + 
  geom_pointrange() +
  geom_abline(intercept = 0, slope = 0) +
  theme_bw() +
  facet_grid( Category~type, scales = "free", labeller = labeller(type = labs)) +
  xlab("") +
  scale_x_discrete() +
  ylab(bquote("Mediated effect ("*a[j]*b[j]*") on support for migration")) +
  theme(axis.text.x  = element_text(size=12),
        strip.text   = element_text(size=13),
        axis.text.y=element_blank(),
        axis.title.x = element_text(size = 15)) +
  guides(color = "none") +
  scale_color_grey(start = 0, end = .6) +
  ylim(c(-.12, .07)) +
  coord_flip() 

# fig A2 bottom left
mediators_exp_high %>%
  ggplot(aes(x = labs, y = est, ymin = lower, ymax = upper, col = as.factor(insig))) + 
  geom_pointrange() +
  geom_abline(intercept = 0, slope = 0) +
  theme_bw() +
  facet_grid( Category~type, scales = "free", labeller = labeller(type = labs)) +
  xlab("") +  
  ylab(bquote("Standardized coefficient estimates ("*a[j]*")")) +
  theme(axis.text.x  = element_text(size=12),
        axis.text.y  = element_text(size=13),
        strip.text   = element_text(size=13),
        axis.title.x = element_text(size = 15)) +
  scale_y_continuous(breaks=c(-5, 0, 5), labels = xax, limits = c(-8.5, 5.2)) +
  guides(color = "none") +
  scale_color_grey(start = 0, end = .6) +
  coord_flip() 

# fig A2 bottom right
mediators_high  %>% 
  ggplot(aes(x = labs, y = est, ymin = lower, ymax = upper, col = as.factor(insig))) + 
  geom_pointrange() +
  geom_abline(intercept = 0, slope = 0) +
  theme_bw() +
  facet_grid( Category~type, scales = "free", labeller = labeller(type = labs)) +
  xlab("") +
  scale_x_discrete() +
  ylab(bquote("Mediated effect ("*a[j]*b[j]*") on support for migration")) +
  theme(axis.text.x  = element_text(size=12),
        strip.text   = element_text(size=13),
        axis.text.y=element_blank(),
        axis.title.x = element_text(size = 15)) +
  guides(color = "none") +
  scale_color_grey(start = 0, end = .6) +
  ylim(c(-.12, .07)) +
  coord_flip() 

####

